This script runs without external data, except for quanteda-inherent files. If you have trouble setting your working directory, please use Slack and we will try to help you to be ready for the next session, in which we will have to load external data.
## load required libraries
library(tidyverse)
library(quanteda)
library(lexicon)
library(reshape2)
library(stringi)
library(quanteda.textplots)
library(gridExtra)## clean workspace
rm(list=ls())This step is only for those who use their own corpora.
## set working directory (WD)
path <- '~/coliphi21/practice_lessons/lesson_1/src/'
setwd(path)
## Quickfix: setwd(dirname(rstudioapi::getActiveDocumentContext()$path))For this tutorial we will use the quanteda-corpus data_corpus_inaugural containing the inaugural addresses of US presidents since 1789. For the next session, you will be able to work with your own data.
df <- data_corpus_inaugural## how does the corpus object look like?
df## Corpus consisting of 59 documents and 4 docvars.
## 1789-Washington :
## "Fellow-Citizens of the Senate and of the House of Representa..."
##
## 1793-Washington :
## "Fellow citizens, I am again called upon by the voice of my c..."
##
## 1797-Adams :
## "When it was first perceived, in early times, that no middle ..."
##
## 1801-Jefferson :
## "Friends and Fellow Citizens: Called upon to undertake the du..."
##
## 1805-Jefferson :
## "Proceeding, fellow citizens, to that qualification which the..."
##
## 1809-Madison :
## "Unwilling to depart from examples of the most revered author..."
##
## [ reached max_ndoc ... 53 more documents ]
## summary statistics
summary(df) %>% head## what object class is the object?
class(df)## [1] "corpus" "character"
## how much space does it use?
object.size(df)## 838560 bytes
## what does data structure look like?
str(df)## 'corpus' Named chr [1:59] "Fellow-Citizens of the Senate and of the House of Representatives:\n\nAmong the vicissitudes incident to life n"| __truncated__ "Fellow citizens, I am again called upon by the voice of my country to execute the functions of its Chief Magist"| __truncated__ "When it was first perceived, in early times, that no middle course for America remained between unlimited submi"| __truncated__ "Friends and Fellow Citizens:\n\nCalled upon to undertake the duties of the first executive office of our countr"| __truncated__ "Proceeding, fellow citizens, to that qualification which the Constitution requires before my entrance on the ch"| __truncated__ "Unwilling to depart from examples of the most revered authority, I avail myself of the occasion now presented t"| __truncated__ "About to add the solemnity of an oath to the obligations imposed by a second call to the station in which my co"| __truncated__ ...
## - attr(*, "names")= chr [1:59] "1789-Washington" "1793-Washington" "1797-Adams" "1801-Jefferson" ...
## - attr(*, "docvars")='data.frame': 59 obs. of 7 variables:
## ..$ docname_ : chr [1:59] "1789-Washington" "1793-Washington" "1797-Adams" "1801-Jefferson" ...
## ..$ docid_ : Factor w/ 59 levels "1789-Washington",..: 1 2 3 4 5 6 7 8 9 10 ...
## ..$ segid_ : int [1:59] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ Year : int [1:59] 1789 1793 1797 1801 1805 1809 1813 1817 1821 1825 ...
## ..$ President: chr [1:59] "Washington" "Washington" "Adams" "Jefferson" ...
## ..$ FirstName: chr [1:59] "George" "George" "John" "Thomas" ...
## ..$ Party : Factor w/ 6 levels "Democratic","Democratic-Republican",..: 4 4 3 2 2 2 2 2 2 2 ...
## - attr(*, "meta")=List of 3
## ..$ system:List of 5
## .. ..$ package-version:Classes 'package_version', 'numeric_version' hidden list of 1
## .. .. ..$ : int [1:3] 3 0 0
## .. ..$ r-version :Classes 'R_system_version', 'package_version', 'numeric_version' hidden list of 1
## .. .. ..$ : int [1:3] 4 0 2
## .. ..$ system : Named chr [1:3] "Darwin" "x86_64" "smueller"
## .. .. ..- attr(*, "names")= chr [1:3] "sysname" "machine" "user"
## .. ..$ directory : chr "/Users/smueller/Documents/GitHub/quanteda"
## .. ..$ created : Date[1:1], format: "2021-08-09"
## ..$ object:List of 2
## .. ..$ unit : chr "documents"
## .. ..$ summary:List of 2
## .. .. ..$ hash: chr(0)
## .. .. ..$ data: NULL
## ..$ user :List of 6
## .. ..$ description: chr "Transcripts of all inaugural addresses delivered by United States Presidents, from Washington 1789 onward. Dat"| __truncated__
## .. ..$ source : chr "Gerhard Peters and John T. Woolley. The American Presidency Project."
## .. ..$ url : chr "https://www.presidency.ucsb.edu/documents/presidential-documents-archive-guidebook/inaugural-addresses"
## .. ..$ author : chr "(various US Presidents)"
## .. ..$ keywords : chr [1:5] "political" "US politics" "United States" "presidents" ...
## .. ..$ title : chr "US presidential inaugural address speeches"
## the document-level variables
docvars(df) %>% head?table## Help on topic 'table' was found in the following packages:
##
## Package Library
## vctrs /home/redapemusic35/R/x86_64-pc-linux-gnu-library/4.0
## base /usr/lib/R/library
Compute the number of Democrat and Republican speeches. Hint: checkout the table-function.
table(df$Party)##
## Democratic Democratic-Republican Federalist none Republican Whig
## 22 7 1 2 24 3
## text data: how can we look at Biden's 2021 speech?
txt <- as.character(df)
names(txt)## [1] "1789-Washington" "1793-Washington" "1797-Adams" "1801-Jefferson" "1805-Jefferson" "1809-Madison" "1813-Madison" "1817-Monroe" "1821-Monroe" "1825-Adams" "1829-Jackson" "1833-Jackson" "1837-VanBuren" "1841-Harrison" "1845-Polk" "1849-Taylor" "1853-Pierce" "1857-Buchanan" "1861-Lincoln" "1865-Lincoln" "1869-Grant" "1873-Grant" "1877-Hayes" "1881-Garfield" "1885-Cleveland" "1889-Harrison" "1893-Cleveland" "1897-McKinley" "1901-McKinley" "1905-Roosevelt" "1909-Taft" "1913-Wilson" "1917-Wilson" "1921-Harding" "1925-Coolidge" "1929-Hoover" "1933-Roosevelt" "1937-Roosevelt" "1941-Roosevelt" "1945-Roosevelt" "1949-Truman" "1953-Eisenhower" "1957-Eisenhower" "1961-Kennedy" "1965-Johnson" "1969-Nixon" "1973-Nixon" "1977-Carter" "1981-Reagan" "1985-Reagan" "1989-Bush" "1993-Clinton" "1997-Clinton" "2001-Bush" "2005-Bush"
## [56] "2009-Obama" "2013-Obama" "2017-Trump" "2021-Biden"
## subsetting data by logical vectors
c('hi', 'hello', 'bye')[c(TRUE, FALSE, TRUE)]## [1] "hi" "bye"
c('hi', 'hello', 'bye')[c(TRUE, FALSE, TRUE, TRUE)]## [1] "hi" "bye" NA
c('hi', 'hello', 'bye')[c(TRUE, FALSE)]## [1] "hi" "bye"
biden <- txt[grepl('Biden', names(txt))]
cat(biden)## Chief Justice Roberts, Vice President Harris, Speaker Pelosi, Leader Schumer, Leader McConnell, Vice President Pence, distinguished guests, and my fellow Americans.
##
## This is America's day.
##
## This is democracy's day.
##
## A day of history and hope.
##
## Of renewal and resolve.
##
## Through a crucible for the ages America has been tested anew and America has risen to the challenge.
##
## Today, we celebrate the triumph not of a candidate, but of a cause, the cause of democracy.
##
## The will of the people has been heard and the will of the people has been heeded.
##
## We have learned again that democracy is precious.
##
## Democracy is fragile.
##
## And at this hour, my friends, democracy has prevailed.
##
## So now, on this hallowed ground where just days ago violence sought to shake this Capitol's very foundation, we come together as one nation, under God, indivisible, to carry out the peaceful transfer of power as we have for more than two centuries.
##
## We look ahead in our uniquely American way – restless, bold, optimistic – and set our sights on the nation we know we can be and we must be.
##
## I thank my predecessors of both parties for their presence here.
##
## I thank them from the bottom of my heart.
##
## You know the resilience of our Constitution and the strength of our nation.
##
## As does President Carter, who I spoke to last night but who cannot be with us today, but whom we salute for his lifetime of service.
##
## I have just taken the sacred oath each of these patriots took — an oath first sworn by George Washington.
##
## But the American story depends not on any one of us, not on some of us, but on all of us.
##
## On "We the People" who seek a more perfect Union.
##
## This is a great nation and we are a good people.
##
## Over the centuries through storm and strife, in peace and in war, we have come so far. But we still have far to go.
##
## We will press forward with speed and urgency, for we have much to do in this winter of peril and possibility.
##
## Much to repair.
##
## Much to restore.
##
## Much to heal.
##
## Much to build.
##
## And much to gain.
##
## Few periods in our nation's history have been more challenging or difficult than the one we're in now.
##
## A once-in-a-century virus silently stalks the country.
##
## It's taken as many lives in one year as America lost in all of World War II.
##
## Millions of jobs have been lost.
##
## Hundreds of thousands of businesses closed.
##
## A cry for racial justice some 400 years in the making moves us. The dream of justice for all will be deferred no longer.
##
## A cry for survival comes from the planet itself. A cry that can't be any more desperate or any more clear.
##
## And now, a rise in political extremism, white supremacy, domestic terrorism that we must confront and we will defeat.
##
## To overcome these challenges – to restore the soul and to secure the future of America – requires more than words.
##
## It requires that most elusive of things in a democracy:
##
## Unity.
##
## Unity.
##
## In another January in Washington, on New Year's Day 1863, Abraham Lincoln signed the Emancipation Proclamation.
##
## When he put pen to paper, the President said, "If my name ever goes down into history it will be for this act and my whole soul is in it."
##
## My whole soul is in it.
##
## Today, on this January day, my whole soul is in this:
##
## Bringing America together.
##
## Uniting our people.
##
## And uniting our nation.
##
## I ask every American to join me in this cause.
##
## Uniting to fight the common foes we face:
##
## Anger, resentment, hatred.
##
## Extremism, lawlessness, violence.
##
## Disease, joblessness, hopelessness.
##
## With unity we can do great things. Important things.
##
## We can right wrongs.
##
## We can put people to work in good jobs.
##
## We can teach our children in safe schools.
##
## We can overcome this deadly virus.
##
## We can reward work, rebuild the middle class, and make health care
##
## secure for all.
##
## We can deliver racial justice.
##
## We can make America, once again, the leading force for good in the world.
##
## I know speaking of unity can sound to some like a foolish fantasy.
##
## I know the forces that divide us are deep and they are real.
##
## But I also know they are not new.
##
## Our history has been a constant struggle between the American ideal that we are all created equal and the harsh, ugly reality that racism, nativism, fear, and demonization have long torn us apart.
##
## The battle is perennial.
##
## Victory is never assured.
##
## Through the Civil War, the Great Depression, World War, 9/11, through struggle, sacrifice, and setbacks, our "better angels" have always prevailed.
##
## In each of these moments, enough of us came together to carry all of us forward.
##
## And, we can do so now.
##
## History, faith, and reason show the way, the way of unity.
##
## We can see each other not as adversaries but as neighbors.
##
## We can treat each other with dignity and respect.
##
## We can join forces, stop the shouting, and lower the temperature.
##
## For without unity, there is no peace, only bitterness and fury.
##
## No progress, only exhausting outrage.
##
## No nation, only a state of chaos.
##
## This is our historic moment of crisis and challenge, and unity is the path forward.
##
## And, we must meet this moment as the United States of America.
##
## If we do that, I guarantee you, we will not fail.
##
## We have never, ever, ever failed in America when we have acted together.
##
## And so today, at this time and in this place, let us start afresh.
##
## All of us.
##
## Let us listen to one another.
##
## Hear one another.
##
## See one another.
##
## Show respect to one another.
##
## Politics need not be a raging fire destroying everything in its path.
##
## Every disagreement doesn't have to be a cause for total war.
##
## And, we must reject a culture in which facts themselves are manipulated and even manufactured.
##
## My fellow Americans, we have to be different than this.
##
## America has to be better than this.
##
## And, I believe America is better than this.
##
## Just look around.
##
## Here we stand, in the shadow of a Capitol dome that was completed amid the Civil War, when the Union itself hung in the balance.
##
## Yet we endured and we prevailed.
##
## Here we stand looking out to the great Mall where Dr. King spoke of his dream.
##
## Here we stand, where 108 years ago at another inaugural, thousands of protestors tried to block brave women from marching for the right to vote.
##
## Today, we mark the swearing-in of the first woman in American history elected to national office – Vice President Kamala Harris.
##
## Don't tell me things can't change.
##
## Here we stand across the Potomac from Arlington National Cemetery, where heroes who gave the last full measure of devotion rest in eternal peace.
##
## And here we stand, just days after a riotous mob thought they could use violence to silence the will of the people, to stop the work of our democracy, and to drive us from this sacred ground.
##
## That did not happen.
##
## It will never happen.
##
## Not today.
##
## Not tomorrow.
##
## Not ever.
##
## To all those who supported our campaign I am humbled by the faith you have placed in us.
##
## To all those who did not support us, let me say this: Hear me out as we move forward. Take a measure of me and my heart.
##
## And if you still disagree, so be it.
##
## That's democracy. That's America. The right to dissent peaceably, within the guardrails of our Republic, is perhaps our nation's greatest strength.
##
## Yet hear me clearly: Disagreement must not lead to disunion.
##
## And I pledge this to you: I will be a President for all Americans.
##
## I will fight as hard for those who did not support me as for those who did.
##
## Many centuries ago, Saint Augustine, a saint of my church, wrote that a people was a multitude defined by the common objects of their love.
##
## What are the common objects we love that define us as Americans?
##
## I think I know.
##
## Opportunity.
##
## Security.
##
## Liberty.
##
## Dignity.
##
## Respect.
##
## Honor.
##
## And, yes, the truth.
##
## Recent weeks and months have taught us a painful lesson.
##
## There is truth and there are lies.
##
## Lies told for power and for profit.
##
## And each of us has a duty and responsibility, as citizens, as Americans, and especially as leaders – leaders who have pledged to honor our Constitution and protect our nation — to defend the truth and to defeat the lies.
##
## I understand that many Americans view the future with some fear and trepidation.
##
## I understand they worry about their jobs, about taking care of their families, about what comes next.
##
## I get it.
##
## But the answer is not to turn inward, to retreat into competing factions, distrusting those who don't look like you do, or worship the way you do, or don't get their news from the same sources you do.
##
## We must end this uncivil war that pits red against blue, rural versus urban, conservative versus liberal.
##
## We can do this if we open our souls instead of hardening our hearts.
##
## If we show a little tolerance and humility.
##
## If we're willing to stand in the other person's shoes just for a moment.
##
## Because here is the thing about life: There is no accounting for what fate will deal you.
##
## There are some days when we need a hand.
##
## There are other days when we're called on to lend one.
##
## That is how we must be with one another.
##
## And, if we are this way, our country will be stronger, more prosperous, more ready for the future.
##
## My fellow Americans, in the work ahead of us, we will need each other.
##
## We will need all our strength to persevere through this dark winter.
##
## We are entering what may well be the toughest and deadliest period of the virus.
##
## We must set aside the politics and finally face this pandemic as one nation.
##
## I promise you this: as the Bible says weeping may endure for a night but joy cometh in the morning.
##
## We will get through this, together
##
## The world is watching today.
##
## So here is my message to those beyond our borders: America has been tested and we have come out stronger for it.
##
## We will repair our alliances and engage with the world once again.
##
## Not to meet yesterday's challenges, but today's and tomorrow's.
##
## We will lead not merely by the example of our power but by the power of our example.
##
## We will be a strong and trusted partner for peace, progress, and security.
##
## We have been through so much in this nation.
##
## And, in my first act as President, I would like to ask you to join me in a moment of silent prayer to remember all those we lost this past year to the pandemic.
##
## To those 400,000 fellow Americans – mothers and fathers, husbands and wives, sons and daughters, friends, neighbors, and co-workers.
##
## We will honor them by becoming the people and nation we know we can and should be.
##
## Let us say a silent prayer for those who lost their lives, for those they left behind, and for our country.
##
## Amen.
##
## This is a time of testing.
##
## We face an attack on democracy and on truth.
##
## A raging virus.
##
## Growing inequity.
##
## The sting of systemic racism.
##
## A climate in crisis.
##
## America's role in the world.
##
## Any one of these would be enough to challenge us in profound ways.
##
## But the fact is we face them all at once, presenting this nation with the gravest of responsibilities.
##
## Now we must step up.
##
## All of us.
##
## It is a time for boldness, for there is so much to do.
##
## And, this is certain.
##
## We will be judged, you and I, for how we resolve the cascading crises of our era.
##
## Will we rise to the occasion?
##
## Will we master this rare and difficult hour?
##
## Will we meet our obligations and pass along a new and better world for our children?
##
## I believe we must and I believe we will.
##
## And when we do, we will write the next chapter in the American story.
##
## It's a story that might sound something like a song that means a lot to me.
##
## It's called "American Anthem" and there is one verse stands out for me:
##
## "The work and prayers
##
## of centuries have brought us to this day
##
## What shall be our legacy?
##
## What will our children say?…
##
## Let me know in my heart
##
## When my days are through
##
## America
##
## America
##
## I gave my best to you."
##
## Let us add our own work and prayers to the unfolding story of our nation.
##
## If we do this then when our days are through our children and our children's children will say of us they gave their best.
##
## They did their duty.
##
## They healed a broken land.
##
## My fellow Americans, I close today where I began, with a sacred oath.
##
## Before God and all of you I give you my word.
##
## I will always level with you.
##
## I will defend the Constitution.
##
## I will defend our democracy.
##
## I will defend America.
##
## I will give my all in your service thinking not of power, but of possibilities.
##
## Not of personal interest, but of the public good.
##
## And together, we shall write an American story of hope, not fear.
##
## Of unity, not division.
##
## Of light, not darkness.
##
## An American story of decency and dignity.
##
## Of love and of healing.
##
## Of greatness and of goodness.
##
## May this be the story that guides us.
##
## The story that inspires us.
##
## The story that tells ages yet to come that we answered the call of history.
##
## We met the moment.
##
## That democracy and hope, truth and justice, did not die on our watch but thrived.
##
## That our America secured liberty at home and stood once again as a beacon to the world.
##
## That is what we owe our forebearers, one another, and generations to follow.
##
## So, with purpose and resolve we turn to the tasks of our time.
##
## Sustained by faith.
##
## Driven by conviction.
##
## And, devoted to one another and to this country we love with all our hearts.
##
## May God bless America and may God protect our troops.
##
## Thank you, America.
## subsetting by name
# select Washington's 1789 speech to compare
cat(txt['1789-Washington'])## Fellow-Citizens of the Senate and of the House of Representatives:
##
## Among the vicissitudes incident to life no event could have filled me with greater anxieties than that of which the notification was transmitted by your order, and received on the 14th day of the present month. On the one hand, I was summoned by my Country, whose voice I can never hear but with veneration and love, from a retreat which I had chosen with the fondest predilection, and, in my flattering hopes, with an immutable decision, as the asylum of my declining years - a retreat which was rendered every day more necessary as well as more dear to me by the addition of habit to inclination, and of frequent interruptions in my health to the gradual waste committed on it by time. On the other hand, the magnitude and difficulty of the trust to which the voice of my country called me, being sufficient to awaken in the wisest and most experienced of her citizens a distrustful scrutiny into his qualifications, could not but overwhelm with despondence one who (inheriting inferior endowments from nature and unpracticed in the duties of civil administration) ought to be peculiarly conscious of his own deficiencies. In this conflict of emotions all I dare aver is that it has been my faithful study to collect my duty from a just appreciation of every circumstance by which it might be affected. All I dare hope is that if, in executing this task, I have been too much swayed by a grateful remembrance of former instances, or by an affectionate sensibility to this transcendent proof of the confidence of my fellow citizens, and have thence too little consulted my incapacity as well as disinclination for the weighty and untried cares before me, my error will be palliated by the motives which mislead me, and its consequences be judged by my country with some share of the partiality in which they originated.
##
## Such being the impressions under which I have, in obedience to the public summons, repaired to the present station, it would be peculiarly improper to omit in this first official act my fervent supplications to that Almighty Being who rules over the universe, who presides in the councils of nations, and whose providential aids can supply every human defect, that His benediction may consecrate to the liberties and happiness of the people of the United States a Government instituted by themselves for these essential purposes, and may enable every instrument employed in its administration to execute with success the functions allotted to his charge. In tendering this homage to the Great Author of every public and private good, I assure myself that it expresses your sentiments not less than my own, nor those of my fellow citizens at large less than either. No people can be bound to acknowledge and adore the Invisible Hand which conducts the affairs of men more than those of the United States. Every step by which they have advanced to the character of an independent nation seems to have been distinguished by some token of providential agency; and in the important revolution just accomplished in the system of their united government the tranquil deliberations and voluntary consent of so many distinct communities from which the event has resulted can not be compared with the means by which most governments have been established without some return of pious gratitude, along with an humble anticipation of the future blessings which the past seem to presage. These reflections, arising out of the present crisis, have forced themselves too strongly on my mind to be suppressed. You will join with me, I trust, in thinking that there are none under the influence of which the proceedings of a new and free government can more auspiciously commence.
##
## By the article establishing the executive department it is made the duty of the President "to recommend to your consideration such measures as he shall judge necessary and expedient." The circumstances under which I now meet you will acquit me from entering into that subject further than to refer to the great constitutional charter under which you are assembled, and which, in defining your powers, designates the objects to which your attention is to be given. It will be more consistent with those circumstances, and far more congenial with the feelings which actuate me, to substitute, in place of a recommendation of particular measures, the tribute that is due to the talents, the rectitude, and the patriotism which adorn the characters selected to devise and adopt them. In these honorable qualifications I behold the surest pledges that as on one side no local prejudices or attachments, no separate views nor party animosities, will misdirect the comprehensive and equal eye which ought to watch over this great assemblage of communities and interests, so, on another, that the foundation of our national policy will be laid in the pure and immutable principles of private morality, and the preeminence of free government be exemplified by all the attributes which can win the affections of its citizens and command the respect of the world. I dwell on this prospect with every satisfaction which an ardent love for my country can inspire, since there is no truth more thoroughly established than that there exists in the economy and course of nature an indissoluble union between virtue and happiness; between duty and advantage; between the genuine maxims of an honest and magnanimous policy and the solid rewards of public prosperity and felicity; since we ought to be no less persuaded that the propitious smiles of Heaven can never be expected on a nation that disregards the eternal rules of order and right which Heaven itself has ordained; and since the preservation of the sacred fire of liberty and the destiny of the republican model of government are justly considered, perhaps, as deeply, as finally, staked on the experiment entrusted to the hands of the American people.
##
## Besides the ordinary objects submitted to your care, it will remain with your judgment to decide how far an exercise of the occasional power delegated by the fifth article of the Constitution is rendered expedient at the present juncture by the nature of objections which have been urged against the system, or by the degree of inquietude which has given birth to them. Instead of undertaking particular recommendations on this subject, in which I could be guided by no lights derived from official opportunities, I shall again give way to my entire confidence in your discernment and pursuit of the public good; for I assure myself that whilst you carefully avoid every alteration which might endanger the benefits of an united and effective government, or which ought to await the future lessons of experience, a reverence for the characteristic rights of freemen and a regard for the public harmony will sufficiently influence your deliberations on the question how far the former can be impregnably fortified or the latter be safely and advantageously promoted.
##
## To the foregoing observations I have one to add, which will be most properly addressed to the House of Representatives. It concerns myself, and will therefore be as brief as possible. When I was first honored with a call into the service of my country, then on the eve of an arduous struggle for its liberties, the light in which I contemplated my duty required that I should renounce every pecuniary compensation. From this resolution I have in no instance departed; and being still under the impressions which produced it, I must decline as inapplicable to myself any share in the personal emoluments which may be indispensably included in a permanent provision for the executive department, and must accordingly pray that the pecuniary estimates for the station in which I am placed may during my continuance in it be limited to such actual expenditures as the public good may be thought to require.
##
## Having thus imparted to you my sentiments as they have been awakened by the occasion which brings us together, I shall take my present leave; but not without resorting once more to the benign Parent of the Human Race in humble supplication that, since He has been pleased to favor the American people with opportunities for deliberating in perfect tranquillity, and dispositions for deciding with unparalleled unanimity on a form of government for the security of their union and the advancement of their happiness, so His divine blessing may be equally conspicuous in the enlarged views, the temperate consultations, and the wise measures on which the success of this Government must depend.
Extract both the Trump and Biden speech at the same time. Hint: | means OR in regex-functions, such as grepl.
l <- txt[grepl('Biden|Trump', names(txt))]
l## 2017-Trump
## "Chief Justice Roberts, President Carter, President Clinton, President Bush, President Obama, fellow Americans, and people of the world: thank you.\n\nWe, the citizens of America, are now joined in a great national effort to rebuild our country and restore its promise for all of our people.\n\nTogether, we will determine the course of America and the world for many, many years to come.\n\nWe will face challenges. We will confront hardships. But we will get the job done.\n\nEvery four years, we gather on these steps to carry out the orderly and peaceful transfer of power, and we are grateful to President Obama and First Lady Michelle Obama for their gracious aid throughout this transition. They have been magnificent. Thank you.\n\nToday's ceremony, however, has very special meaning. Because today we are not merely transferring power from one Administration to another, or from one party to another - but we are transferring power from Washington DC and giving it back to you, the people.\n\nFor too long, a small group in our nation's Capital has reaped the rewards of government while the people have borne the cost.\n\nWashington flourished - but the people did not share in its wealth.\n\nPoliticians prospered - but the jobs left, and the factories closed.\n\nThe establishment protected itself, but not the citizens of our country.\n\nTheir victories have not been your victories; their triumphs have not been your triumphs; and while they celebrated in our nation's capital, there was little to celebrate for struggling families all across our land.\n\nThat all changes - starting right here, and right now, because this moment is your moment: it belongs to you.\n\nIt belongs to everyone gathered here today and everyone watching all across America.\n\nThis is your day. This is your celebration.\n\nAnd this, the United States of America, is your country.\n\nWhat truly matters is not which party controls our government, but whether our government is controlled by the people.\n\nJanuary 20, 2017, will be remembered as the day the people became the rulers of this nation again.\n\nThe forgotten men and women of our country will be forgotten no longer.\n\nEveryone is listening to you now.\n\nYou came by the tens of millions to become part of a historic movement the likes of which the world has never seen before.\n\nAt the center of this movement is a crucial conviction: that a nation exists to serve its citizens.\n\nAmericans want great schools for their children, safe neighborhoods for their families, and good jobs for themselves.\n\nThese are just and reasonable demands of righteous people and a righteous public.\n\nBut for too many of our citizens, a different reality exists: mothers and children trapped in poverty in our inner cities; rusted-out factories scattered like tombstones across the landscape of our nation; an education system, flush with cash, but which leaves our young and beautiful students deprived of all knowledge; and the crime and the gangs and the drugs that have stolen too many lives and robbed our country of so much unrealized potential.\n\nThis American carnage stops right here and stops right now.\n\nWe are one nation - and their pain is our pain. Their dreams are our dreams; and their success will be our success. We share one heart, one home, and one glorious destiny.\n\nThe oath of office I take today is an oath of allegiance to all Americans.\n\nFor many decades, we've enriched foreign industry at the expense of American industry; subsidized the armies of other countries while allowing for the very sad depletion of our military; we've defended other nations' borders while refusing to defend our own; and spent trillions and trillions of dollars overseas while America's infrastructure has fallen into disrepair and decay.\n\nWe've made other countries rich while the wealth, strength, and confidence of our country has dissipated over the horizon.\n\nOne by one, the factories shuttered and left our shores, with not even a thought about the millions and millions of American workers that were left behind.\n\nThe wealth of our middle class has been ripped from their homes and then redistributed all across the world.\n\nBut that is the past. And now we are looking only to the future.\n\nWe assembled here today are issuing a new decree to be heard in every city, in every foreign capital, and in every hall of power.\n\nFrom this day forward, a new vision will govern our land.\n\nFrom this day forward, it's going to be only America first, America first.\n\nEvery decision on trade, on taxes, on immigration, on foreign affairs, will be made to benefit American workers and American families.\n\nWe must protect our borders from the ravages of other countries making our products, stealing our companies, and destroying our jobs. Protection will lead to great prosperity and strength.\n\nI will fight for you with every breath in my body - and I will never, ever let you down.\n\nAmerica will start winning again, winning like never before.\n\nWe will bring back our jobs. We will bring back our borders. We will bring back our wealth. And we will bring back our dreams.\n\nWe will build new roads, and highways, and bridges, and airports, and tunnels, and railways all across our wonderful nation.\n\nWe will get our people off of welfare and back to work - rebuilding our country with American hands and American labor.\n\nWe will follow two simple rules: buy American and hire American.\n\nWe will seek friendship and goodwill with the nations of the world - but we do so with the understanding that it is the right of all nations to put their own interests first.\n\nWe do not seek to impose our way of life on anyone, but rather to let it shine as an example for everyone to follow.\n\nWe will reinforce old alliances and form new ones - and unite the civilized world against radical Islamic terrorism, which we will eradicate from the face of the Earth.\n\nAt the bedrock of our politics will be a total allegiance to the United States of America, and through our loyalty to our country, we will rediscover our loyalty to each other.\n\nWhen you open your heart to patriotism, there is no room for prejudice.\n\nThe Bible tells us: \"How good and pleasant it is when God's people live together in unity.\"\n\nWe must speak our minds openly, debate our disagreements honestly, but always pursue solidarity.\n\nWhen America is united, America is totally unstoppable.\n\nThere should be no fear - we are protected, and we will always be protected.\n\nWe will be protected by the great men and women of our military and law enforcement and, most importantly, we are protected by God.\n\nFinally, we must think big and dream even bigger.\n\nIn America, we understand that a nation is only living as long as it is striving.\n\nWe will no longer accept politicians who are all talk and no action - constantly complaining but never doing anything about it.\n\nThe time for empty talk is over.\n\nNow arrives the hour of action.\n\nDo not let anyone tell you it cannot be done. No challenge can match the heart and fight and spirit of America.\n\nWe will not fail. Our country will thrive and prosper again.\n\nWe stand at the birth of a new millennium, ready to unlock the mysteries of space, to free the Earth from the miseries of disease, and to harness the energies, industries and technologies of tomorrow.\n\nA new national pride will stir ourselves, lift our sights, and heal our divisions.\n\nIt is time to remember that old wisdom our soldiers will never forget: that whether we are black or brown or white, we all bleed the same red blood of patriots, we all enjoy the same glorious freedoms, and we all salute the same great American Flag.\n\nAnd whether a child is born in the urban sprawl of Detroit or the windswept plains of Nebraska, they look up at the same night sky, they fill their heart with the same dreams, and they are infused with the breath of life by the same almighty Creator.\n\nSo to all Americans, in every city near and far, small and large, from mountain to mountain, and from ocean to ocean, hear these words:\n\nYou will never be ignored again.\n\nYour voice, your hopes, and your dreams, will define our American destiny. And your courage and goodness and love will forever guide us along the way.\n\nTogether, we will make America strong again.\n\nWe will make America wealthy again.\n\nWe will make America proud again.\n\nWe will make America safe again.\n\nAnd, yes, together, we will make America great again. Thank you, God bless you, and God bless America."
## 2021-Biden
## "Chief Justice Roberts, Vice President Harris, Speaker Pelosi, Leader Schumer, Leader McConnell, Vice President Pence, distinguished guests, and my fellow Americans.\n\nThis is America's day.\n\nThis is democracy's day.\n\nA day of history and hope.\n\nOf renewal and resolve.\n\nThrough a crucible for the ages America has been tested anew and America has risen to the challenge.\n\nToday, we celebrate the triumph not of a candidate, but of a cause, the cause of democracy.\n\nThe will of the people has been heard and the will of the people has been heeded.\n\nWe have learned again that democracy is precious.\n\nDemocracy is fragile.\n\nAnd at this hour, my friends, democracy has prevailed.\n\nSo now, on this hallowed ground where just days ago violence sought to shake this Capitol's very foundation, we come together as one nation, under God, indivisible, to carry out the peaceful transfer of power as we have for more than two centuries.\n\nWe look ahead in our uniquely American way – restless, bold, optimistic – and set our sights on the nation we know we can be and we must be.\n\nI thank my predecessors of both parties for their presence here.\n\nI thank them from the bottom of my heart.\n\nYou know the resilience of our Constitution and the strength of our nation.\n\nAs does President Carter, who I spoke to last night but who cannot be with us today, but whom we salute for his lifetime of service.\n\nI have just taken the sacred oath each of these patriots took — an oath first sworn by George Washington.\n\nBut the American story depends not on any one of us, not on some of us, but on all of us.\n\nOn \"We the People\" who seek a more perfect Union.\n\nThis is a great nation and we are a good people.\n\nOver the centuries through storm and strife, in peace and in war, we have come so far. But we still have far to go.\n\nWe will press forward with speed and urgency, for we have much to do in this winter of peril and possibility.\n\nMuch to repair.\n\nMuch to restore.\n\nMuch to heal.\n\nMuch to build.\n\nAnd much to gain.\n\nFew periods in our nation's history have been more challenging or difficult than the one we're in now.\n\nA once-in-a-century virus silently stalks the country.\n\nIt's taken as many lives in one year as America lost in all of World War II.\n\nMillions of jobs have been lost.\n\nHundreds of thousands of businesses closed.\n\nA cry for racial justice some 400 years in the making moves us. The dream of justice for all will be deferred no longer.\n\nA cry for survival comes from the planet itself. A cry that can't be any more desperate or any more clear.\n\nAnd now, a rise in political extremism, white supremacy, domestic terrorism that we must confront and we will defeat.\n\nTo overcome these challenges – to restore the soul and to secure the future of America – requires more than words.\n\nIt requires that most elusive of things in a democracy:\n\nUnity.\n\nUnity.\n\nIn another January in Washington, on New Year's Day 1863, Abraham Lincoln signed the Emancipation Proclamation.\n\nWhen he put pen to paper, the President said, \"If my name ever goes down into history it will be for this act and my whole soul is in it.\"\n\nMy whole soul is in it.\n\nToday, on this January day, my whole soul is in this:\n\nBringing America together.\n\nUniting our people.\n\nAnd uniting our nation.\n\nI ask every American to join me in this cause.\n\nUniting to fight the common foes we face:\n\nAnger, resentment, hatred.\n\nExtremism, lawlessness, violence.\n\nDisease, joblessness, hopelessness.\n\nWith unity we can do great things. Important things.\n\nWe can right wrongs.\n\nWe can put people to work in good jobs.\n\nWe can teach our children in safe schools.\n\nWe can overcome this deadly virus.\n\nWe can reward work, rebuild the middle class, and make health care\n\nsecure for all.\n\nWe can deliver racial justice.\n\nWe can make America, once again, the leading force for good in the world.\n\nI know speaking of unity can sound to some like a foolish fantasy.\n\nI know the forces that divide us are deep and they are real.\n\nBut I also know they are not new.\n\nOur history has been a constant struggle between the American ideal that we are all created equal and the harsh, ugly reality that racism, nativism, fear, and demonization have long torn us apart.\n\nThe battle is perennial.\n\nVictory is never assured.\n\nThrough the Civil War, the Great Depression, World War, 9/11, through struggle, sacrifice, and setbacks, our \"better angels\" have always prevailed.\n\nIn each of these moments, enough of us came together to carry all of us forward.\n\nAnd, we can do so now.\n\nHistory, faith, and reason show the way, the way of unity.\n\nWe can see each other not as adversaries but as neighbors.\n\nWe can treat each other with dignity and respect.\n\nWe can join forces, stop the shouting, and lower the temperature.\n\nFor without unity, there is no peace, only bitterness and fury.\n\nNo progress, only exhausting outrage.\n\nNo nation, only a state of chaos.\n\nThis is our historic moment of crisis and challenge, and unity is the path forward.\n\nAnd, we must meet this moment as the United States of America.\n\nIf we do that, I guarantee you, we will not fail.\n\nWe have never, ever, ever failed in America when we have acted together.\n\nAnd so today, at this time and in this place, let us start afresh.\n\nAll of us.\n\nLet us listen to one another.\n\nHear one another.\n\nSee one another.\n\nShow respect to one another.\n\nPolitics need not be a raging fire destroying everything in its path.\n\nEvery disagreement doesn't have to be a cause for total war.\n\nAnd, we must reject a culture in which facts themselves are manipulated and even manufactured.\n\nMy fellow Americans, we have to be different than this.\n\nAmerica has to be better than this.\n\nAnd, I believe America is better than this.\n\nJust look around.\n\nHere we stand, in the shadow of a Capitol dome that was completed amid the Civil War, when the Union itself hung in the balance.\n\nYet we endured and we prevailed.\n\nHere we stand looking out to the great Mall where Dr. King spoke of his dream.\n\nHere we stand, where 108 years ago at another inaugural, thousands of protestors tried to block brave women from marching for the right to vote.\n\nToday, we mark the swearing-in of the first woman in American history elected to national office – Vice President Kamala Harris.\n\nDon't tell me things can't change.\n\nHere we stand across the Potomac from Arlington National Cemetery, where heroes who gave the last full measure of devotion rest in eternal peace.\n\nAnd here we stand, just days after a riotous mob thought they could use violence to silence the will of the people, to stop the work of our democracy, and to drive us from this sacred ground.\n\nThat did not happen.\n\nIt will never happen.\n\nNot today.\n\nNot tomorrow.\n\nNot ever.\n\nTo all those who supported our campaign I am humbled by the faith you have placed in us.\n\nTo all those who did not support us, let me say this: Hear me out as we move forward. Take a measure of me and my heart.\n\nAnd if you still disagree, so be it.\n\nThat's democracy. That's America. The right to dissent peaceably, within the guardrails of our Republic, is perhaps our nation's greatest strength.\n\nYet hear me clearly: Disagreement must not lead to disunion.\n\nAnd I pledge this to you: I will be a President for all Americans.\n\nI will fight as hard for those who did not support me as for those who did.\n\nMany centuries ago, Saint Augustine, a saint of my church, wrote that a people was a multitude defined by the common objects of their love.\n\nWhat are the common objects we love that define us as Americans?\n\nI think I know.\n\nOpportunity.\n\nSecurity.\n\nLiberty.\n\nDignity.\n\nRespect.\n\nHonor.\n\nAnd, yes, the truth.\n\nRecent weeks and months have taught us a painful lesson.\n\nThere is truth and there are lies.\n\nLies told for power and for profit.\n\nAnd each of us has a duty and responsibility, as citizens, as Americans, and especially as leaders – leaders who have pledged to honor our Constitution and protect our nation — to defend the truth and to defeat the lies.\n\nI understand that many Americans view the future with some fear and trepidation.\n\nI understand they worry about their jobs, about taking care of their families, about what comes next.\n\nI get it.\n\nBut the answer is not to turn inward, to retreat into competing factions, distrusting those who don't look like you do, or worship the way you do, or don't get their news from the same sources you do.\n\nWe must end this uncivil war that pits red against blue, rural versus urban, conservative versus liberal.\n\nWe can do this if we open our souls instead of hardening our hearts.\n\nIf we show a little tolerance and humility.\n\nIf we're willing to stand in the other person's shoes just for a moment.\n\nBecause here is the thing about life: There is no accounting for what fate will deal you.\n\nThere are some days when we need a hand.\n\nThere are other days when we're called on to lend one.\n\nThat is how we must be with one another.\n\nAnd, if we are this way, our country will be stronger, more prosperous, more ready for the future.\n\nMy fellow Americans, in the work ahead of us, we will need each other.\n\nWe will need all our strength to persevere through this dark winter.\n\nWe are entering what may well be the toughest and deadliest period of the virus.\n\nWe must set aside the politics and finally face this pandemic as one nation.\n\nI promise you this: as the Bible says weeping may endure for a night but joy cometh in the morning.\n\nWe will get through this, together\n\nThe world is watching today.\n\nSo here is my message to those beyond our borders: America has been tested and we have come out stronger for it.\n\nWe will repair our alliances and engage with the world once again.\n\nNot to meet yesterday's challenges, but today's and tomorrow's.\n\nWe will lead not merely by the example of our power but by the power of our example.\n\nWe will be a strong and trusted partner for peace, progress, and security.\n\nWe have been through so much in this nation.\n\nAnd, in my first act as President, I would like to ask you to join me in a moment of silent prayer to remember all those we lost this past year to the pandemic.\n\nTo those 400,000 fellow Americans – mothers and fathers, husbands and wives, sons and daughters, friends, neighbors, and co-workers.\n\nWe will honor them by becoming the people and nation we know we can and should be.\n\nLet us say a silent prayer for those who lost their lives, for those they left behind, and for our country.\n\nAmen.\n\nThis is a time of testing.\n\nWe face an attack on democracy and on truth.\n\nA raging virus.\n\nGrowing inequity.\n\nThe sting of systemic racism.\n\nA climate in crisis.\n\nAmerica's role in the world.\n\nAny one of these would be enough to challenge us in profound ways.\n\nBut the fact is we face them all at once, presenting this nation with the gravest of responsibilities.\n\nNow we must step up.\n\nAll of us.\n\nIt is a time for boldness, for there is so much to do.\n\nAnd, this is certain.\n\nWe will be judged, you and I, for how we resolve the cascading crises of our era.\n\nWill we rise to the occasion?\n\nWill we master this rare and difficult hour?\n\nWill we meet our obligations and pass along a new and better world for our children?\n\nI believe we must and I believe we will.\n\nAnd when we do, we will write the next chapter in the American story.\n\nIt's a story that might sound something like a song that means a lot to me.\n\nIt's called \"American Anthem\" and there is one verse stands out for me:\n\n\"The work and prayers\n\nof centuries have brought us to this day\n\nWhat shall be our legacy?\n\nWhat will our children say?…\n\nLet me know in my heart\n\nWhen my days are through\n\nAmerica\n\nAmerica\n\nI gave my best to you.\"\n\nLet us add our own work and prayers to the unfolding story of our nation.\n\nIf we do this then when our days are through our children and our children's children will say of us they gave their best.\n\nThey did their duty.\n\nThey healed a broken land.\n\nMy fellow Americans, I close today where I began, with a sacred oath.\n\nBefore God and all of you I give you my word.\n\nI will always level with you.\n\nI will defend the Constitution.\n\nI will defend our democracy.\n\nI will defend America.\n\nI will give my all in your service thinking not of power, but of possibilities.\n\nNot of personal interest, but of the public good.\n\nAnd together, we shall write an American story of hope, not fear.\n\nOf unity, not division.\n\nOf light, not darkness.\n\nAn American story of decency and dignity.\n\nOf love and of healing.\n\nOf greatness and of goodness.\n\nMay this be the story that guides us.\n\nThe story that inspires us.\n\nThe story that tells ages yet to come that we answered the call of history.\n\nWe met the moment.\n\nThat democracy and hope, truth and justice, did not die on our watch but thrived.\n\nThat our America secured liberty at home and stood once again as a beacon to the world.\n\nThat is what we owe our forebearers, one another, and generations to follow.\n\nSo, with purpose and resolve we turn to the tasks of our time.\n\nSustained by faith.\n\nDriven by conviction.\n\nAnd, devoted to one another and to this country we love with all our hearts.\n\nMay God bless America and may God protect our troops.\n\nThank you, America."
str(l)## Named chr [1:2] "Chief Justice Roberts, President Carter, President Clinton, President Bush, President Obama, fellow Americans, "| __truncated__ "Chief Justice Roberts, Vice President Harris, Speaker Pelosi, Leader Schumer, Leader McConnell, Vice President "| __truncated__
## - attr(*, "names")= chr [1:2] "2017-Trump" "2021-Biden"
## word tokenization
?tokens
?dfm
toks <- tokens(df, remove_punct = T, remove_symbols = T, padding = F)
toks## Tokens consisting of 59 documents and 4 docvars.
## 1789-Washington :
## [1] "Fellow-Citizens" "of" "the" "Senate" "and" "of" "the" "House" "of" "Representatives" "Among" "the"
## [ ... and 1,418 more ]
##
## 1793-Washington :
## [1] "Fellow" "citizens" "I" "am" "again" "called" "upon" "by" "the" "voice" "of" "my"
## [ ... and 123 more ]
##
## 1797-Adams :
## [1] "When" "it" "was" "first" "perceived" "in" "early" "times" "that" "no" "middle" "course"
## [ ... and 2,306 more ]
##
## 1801-Jefferson :
## [1] "Friends" "and" "Fellow" "Citizens" "Called" "upon" "to" "undertake" "the" "duties" "of" "the"
## [ ... and 1,714 more ]
##
## 1805-Jefferson :
## [1] "Proceeding" "fellow" "citizens" "to" "that" "qualification" "which" "the" "Constitution" "requires" "before" "my"
## [ ... and 2,154 more ]
##
## 1809-Madison :
## [1] "Unwilling" "to" "depart" "from" "examples" "of" "the" "most" "revered" "authority" "I" "avail"
## [ ... and 1,163 more ]
##
## [ reached max_ndoc ... 53 more documents ]
## document-term matrix
dfx <- dfm(toks)
dfx## Document-feature matrix of: 59 documents, 9,422 features (91.89% sparse) and 4 docvars.
## features
## docs fellow-citizens of the senate and house representatives among vicissitudes incident
## 1789-Washington 1 71 116 1 48 2 2 1 1 1
## 1793-Washington 0 11 13 0 2 0 0 0 0 0
## 1797-Adams 3 140 163 1 130 0 2 4 0 0
## 1801-Jefferson 2 104 130 0 81 0 0 1 0 0
## 1805-Jefferson 0 101 143 0 93 0 0 7 0 0
## 1809-Madison 1 69 104 0 43 0 0 0 0 0
## [ reached max_ndoc ... 53 more documents, reached max_nfeat ... 9,412 more features ]
Remove numbers (part of the noise) from the tokens. Hint: Checkout the tokens-function.
toks <- tokens(df, remove_punct = T, remove_symbols = T, padding = F, remove_numbers = T,
remove_url = T, remove_separators = T)
dfx <- dfm(toks)## top 10 features for every document
topfeatures(dfx, n = 10, groups = docnames(dfx))## $`1789-Washington`
## the of and to which in i be my by
## 116 71 48 48 36 31 23 23 22 20
##
## $`1793-Washington`
## the of i to in shall and by my it
## 13 11 6 5 3 3 2 2 2 2
##
## $`1797-Adams`
## the of and to a in it be by if
## 163 140 130 72 51 47 34 31 30 25
##
## $`1801-Jefferson`
## the of and to which that in our i a
## 130 104 81 61 25 24 24 24 21 21
##
## $`1805-Jefferson`
## the of and to that in with their them have
## 143 101 93 83 37 35 28 28 27 24
##
## $`1809-Madison`
## the of to and in a as which by i
## 104 69 61 43 34 19 15 14 11 11
##
## $`1813-Madison`
## the of and to a on our in it which
## 100 65 44 42 25 22 22 21 18 16
##
## $`1817-Monroe`
## the of to and in our a it be is
## 275 164 126 122 79 65 61 57 50 41
##
## $`1821-Monroe`
## the of to and in a which it be our
## 360 197 146 141 136 76 66 64 64 60
##
## $`1825-Adams`
## the of and to in by have that our been
## 304 245 116 101 62 38 36 36 36 29
##
## $`1829-Jackson`
## the of to and in that our a be their
## 92 71 53 49 24 21 18 16 16 16
##
## $`1833-Jackson`
## the of and to in our my a which all
## 101 76 53 46 23 19 18 15 14 14
##
## $`1837-VanBuren`
## the of and to in that our a it i
## 252 198 150 139 76 60 60 59 42 39
##
## $`1841-Harrison`
## the of to and in that a it which be
## 829 604 318 231 173 132 132 111 107 106
##
## $`1845-Polk`
## the of and to our in be a it that
## 397 298 189 184 101 87 76 65 54 47
##
## $`1849-Taylor`
## the of to and in i by be shall our
## 99 62 61 52 20 18 17 16 15 15
##
## $`1853-Pierce`
## the of and to a in be that which it
## 230 169 130 107 62 60 57 46 41 34
##
## $`1857-Buchanan`
## the of to and in a this our it is
## 238 139 105 97 61 58 39 35 32 32
##
## $`1861-Lincoln`
## the of to and in be that it a is
## 256 146 134 105 77 76 59 59 56 49
##
## $`1865-Lincoln`
## the to and of it that war all which in
## 58 27 24 22 13 12 12 10 9 9
##
## $`1869-Grant`
## the to of and in be i a it will
## 83 57 47 27 27 25 19 19 16 16
##
## $`1873-Grant`
## the of and to in i my a that be
## 106 72 50 49 26 25 21 21 20 19
##
## $`1877-Hayes`
## the of and to in a that be by as
## 240 166 102 88 63 41 39 32 26 26
##
## $`1881-Garfield`
## the of and to in is that a it our
## 317 181 119 80 49 37 35 35 35 35
##
## $`1885-Cleveland`
## the of and to in a our their is be
## 174 117 103 57 31 30 26 22 19 18
##
## $`1889-Harrison`
## the of and to in our that a not be
## 360 240 192 133 80 76 66 65 46 45
##
## $`1893-Cleveland`
## the of and to our in be which that by
## 156 119 102 79 46 36 25 23 21 21
##
## $`1897-McKinley`
## the of and to in be our a it is
## 345 228 171 113 81 65 60 57 56 46
##
## $`1901-McKinley`
## the of and to in we be it our for
## 200 110 97 65 42 28 27 26 25 23
##
## $`1905-Roosevelt`
## the of and we to in our a which have
## 65 45 38 32 28 23 22 20 16 15
##
## $`1909-Taft`
## the of and to in a be is as it
## 486 314 220 218 140 109 79 62 58 56
##
## $`1913-Wilson`
## the of and to we our in it a have
## 109 87 78 49 40 30 29 29 27 25
##
## $`1917-Wilson`
## the and of we to in our that have be
## 94 77 76 47 46 36 33 29 27 22
##
## $`1921-Harding`
## the of and to we our in for a is
## 200 159 152 104 80 68 63 52 47 47
##
## $`1925-Coolidge`
## the of and to we a in that is not
## 261 207 146 135 88 77 71 65 65 61
##
## $`1929-Hoover`
## the of and to in our a is for that
## 288 250 122 100 83 75 49 48 44 39
##
## $`1933-Roosevelt`
## the of and to in a that our we it
## 130 109 58 50 44 38 32 29 26 25
##
## $`1937-Roosevelt`
## of the to and we a that our in have
## 106 106 56 53 47 39 33 33 29 21
##
## $`1941-Roosevelt`
## the of and to in we a it is that
## 114 81 47 36 35 32 31 28 24 23
##
## $`1945-Roosevelt`
## the we of and to that our a in it
## 27 26 25 21 16 14 14 13 11 7
##
## $`1949-Truman`
## the and of to we in that a our for
## 141 100 96 81 59 56 37 36 32 30
##
## $`1953-Eisenhower`
## the of and to we in our that this a
## 171 142 101 81 66 65 58 40 37 33
##
## $`1957-Eisenhower`
## the of and we to in our all a is
## 114 96 64 51 44 43 38 26 25 20
##
## $`1961-Kennedy`
## the of to and we a in our that not
## 86 65 43 41 30 29 26 21 20 19
##
## $`1965-Johnson`
## the and of to in we a our that is
## 77 65 57 37 36 34 33 32 27 27
##
## $`1969-Nixon`
## the of to we in our that and as a
## 136 94 69 65 61 47 42 39 34 31
##
## $`1973-Nixon`
## the of to in and we a that for our
## 83 68 65 58 50 47 35 33 32 32
##
## $`1977-Carter`
## the and to we our of a for that in
## 53 48 44 43 35 33 29 24 23 22
##
## $`1981-Reagan`
## the and of to we our a in that will
## 122 92 90 80 57 56 46 45 34 33
##
## $`1985-Reagan`
## the and of to we a our in for is
## 130 110 95 73 68 59 55 46 35 33
##
## $`1989-Bush`
## the and a to of we is our in are
## 121 98 73 63 61 60 49 44 38 36
##
## $`1993-Clinton`
## the and our we to of in is for world
## 89 66 57 52 49 46 31 28 20 18
##
## $`1997-Clinton`
## the of and to our a we in new that
## 133 96 94 64 63 59 42 35 29 27
##
## $`2001-Bush`
## and of the our we a to in is not
## 82 58 53 50 47 46 45 31 31 27
##
## $`2005-Bush`
## the of and in our to we is that a
## 142 116 108 51 50 38 37 30 28 27
##
## $`2009-Obama`
## the and of to our we that a is in
## 135 111 82 70 67 62 49 47 36 25
##
## $`2013-Obama`
## the and our of we to that a for is
## 104 89 76 69 68 66 55 37 28 25
##
## $`2017-Trump`
## and the of our we will to is america a
## 77 71 48 47 46 40 36 21 18 15
##
## $`2021-Biden`
## the and we of to a our in this i
## 101 96 88 77 65 46 43 42 39 33
## ugh, not very informative...## let's remove stopwords before creating a document-term matrix
## this is done during tokenization
stopwords('en')## [1] "i" "me" "my" "myself" "we" "our" "ours" "ourselves" "you" "your" "yours" "yourself" "yourselves" "he" "him" "his" "himself" "she" "her" "hers" "herself" "it" "its" "itself" "they" "them" "their" "theirs" "themselves" "what" "which" "who" "whom" "this" "that" "these" "those" "am" "is" "are" "was" "were" "be" "been" "being" "have" "has" "had" "having" "do" "does" "did" "doing" "would" "should" "could" "ought" "i'm" "you're" "he's" "she's" "it's" "we're" "they're" "i've" "you've" "we've" "they've" "i'd" "you'd" "he'd" "she'd" "we'd" "they'd" "i'll" "you'll"
## [77] "he'll" "she'll" "we'll" "they'll" "isn't" "aren't" "wasn't" "weren't" "hasn't" "haven't" "hadn't" "doesn't" "don't" "didn't" "won't" "wouldn't" "shan't" "shouldn't" "can't" "cannot" "couldn't" "mustn't" "let's" "that's" "who's" "what's" "here's" "there's" "when's" "where's" "why's" "how's" "a" "an" "the" "and" "but" "if" "or" "because" "as" "until" "while" "of" "at" "by" "for" "with" "about" "against" "between" "into" "through" "during" "before" "after" "above" "below" "to" "from" "up" "down" "in" "out" "on" "off" "over" "under" "again" "further" "then" "once" "here" "there" "when" "where"
## [153] "why" "how" "all" "any" "both" "each" "few" "more" "most" "other" "some" "such" "no" "nor" "not" "only" "own" "same" "so" "than" "too" "very" "will"
sel_toks <- tokens_select(toks, pattern = stopwords("en"), selection = "remove")
dfx <- dfm(sel_toks)Remove additional words that are not part of the stopwords-vector, such as “much”, “every”, and “never”. Hint: Two or more vectors can be concatenated by putting them into one and the same vector.
?tokens_select
sel_toks <- tokens_select(toks, pattern = c(stopwords("en"), 'every', 'never'), selection = "remove", valuetype = "glob")# again: 10 features for every document, now without stopwords
topfeatures(dfx, n = 10, groups = docnames(dfx))## $`1789-Washington`
## can every government public may present country one citizens duty
## 9 9 8 6 6 5 5 4 4 4
##
## $`1793-Washington`
## shall now oath present country voice called citizens administration confidence
## 3 2 2 1 1 1 1 1 1 1
##
## $`1797-Adams`
## people government may nations country can states nation constitution foreign
## 20 16 13 11 9 9 9 9 8 8
##
## $`1801-Jefferson`
## government us may let one shall principle man citizens fellow
## 12 10 8 7 6 6 6 6 5 5
##
## $`1805-Jefferson`
## public citizens may fellow state among shall us can time
## 14 10 10 8 8 7 7 7 6 6
##
## $`1809-Madison`
## public nations can country well states rights peace confidence united
## 6 6 5 4 4 4 4 4 3 3
##
## $`1813-Madison`
## war country every united british states nation without spirit citizens
## 15 5 5 5 5 4 4 4 4 3
##
## $`1817-Monroe`
## states government great people every us united just may union
## 21 21 21 15 14 14 13 10 10 10
##
## $`1821-Monroe`
## great states united war may made citizens every government people
## 29 20 16 16 15 15 14 13 12 11
##
## $`1825-Adams`
## union government upon rights country public great peace first nation
## 20 17 16 10 9 9 9 9 8 8
##
## $`1829-Jackson`
## public government shall can power may people national whose duties
## 8 6 6 5 5 4 4 4 3 3
##
## $`1833-Jackson`
## government people union states powers upon general may united preservation
## 13 9 9 8 5 5 5 4 4 4
##
## $`1837-VanBuren`
## every people institutions government country upon us may can never
## 20 20 16 15 13 13 12 11 9 8
##
## $`1841-Harrison`
## power people government constitution may upon one can executive states
## 47 38 36 36 34 34 26 26 25 24
##
## $`1845-Polk`
## government states union one people powers constitution country interests upon
## 45 36 32 19 16 16 15 14 14 14
##
## $`1849-Taylor`
## shall government country duties may interests constitution us congress day
## 15 7 6 5 4 4 4 4 4 3
##
## $`1853-Pierce`
## upon can power government every may shall must us states
## 24 14 11 10 9 9 9 9 9 8
##
## $`1857-Buchanan`
## states shall constitution may people government great question country public
## 22 18 17 15 13 13 11 11 9 9
##
## $`1861-Lincoln`
## can constitution people union states government shall now upon one
## 28 24 20 20 19 18 17 15 15 14
##
## $`1865-Lincoln`
## war shall god years union let may right must us
## 12 5 5 4 4 4 3 3 3 3
##
## $`1869-Grant`
## country now every public may without us laws best dollar
## 8 8 6 5 5 5 5 5 5 5
##
## $`1873-Grant`
## country people made best great office one can government good
## 8 7 6 6 5 5 4 4 4 4
##
## $`1877-Hayes`
## country government upon public states political people great party citizens
## 20 15 15 11 11 10 9 9 8 7
##
## $`1881-Garfield`
## people government states constitution can upon great union law nation
## 21 20 15 15 13 13 11 11 10 9
##
## $`1885-Cleveland`
## people government public shall constitution interests every citizens policy upon
## 16 16 11 10 8 7 5 5 5 5
##
## $`1889-Harrison`
## people upon states shall public laws may great constitution government
## 29 21 20 18 17 17 12 12 11 10
##
## $`1893-Cleveland`
## people government us can every public american support national service
## 19 13 10 9 9 8 8 7 6 6
##
## $`1897-McKinley`
## upon people government must congress great country can public every
## 31 25 23 23 18 16 14 13 13 12
##
## $`1901-McKinley`
## government people upon now united states executive congress us shall
## 13 12 11 10 9 9 9 9 8 7
##
## $`1905-Roosevelt`
## us life people must great nation problems men power cause
## 12 6 6 6 5 5 5 4 4 4
##
## $`1909-Taft`
## government business must can may upon proper congress race law
## 26 22 19 18 18 16 15 14 13 13
##
## $`1913-Wilson`
## great government life every men upon justice things nation shall
## 14 9 8 8 8 8 8 7 6 6
##
## $`1917-Wilson`
## upon shall us purpose action life world peace stand can
## 13 9 8 8 7 6 6 6 6 5
##
## $`1921-Harding`
## world must america war never civilization can new order may
## 23 23 15 13 12 12 11 11 10 10
##
## $`1925-Coolidge`
## can country must great people government world peace much upon
## 26 17 17 16 15 14 13 13 12 12
##
## $`1929-Hoover`
## government can upon progress people world must peace justice nation
## 24 17 17 16 15 15 15 15 14 12
##
## $`1933-Roosevelt`
## can national must may people shall leadership helped nation world
## 11 9 9 8 7 7 7 7 6 6
##
## $`1937-Roosevelt`
## government people can nation good men see democracy power progress
## 15 11 9 9 8 8 8 8 7 7
##
## $`1941-Roosevelt`
## nation know spirit democracy life us people america years freedom
## 11 10 9 9 8 8 7 7 6 6
##
## $`1945-Roosevelt`
## shall peace learned men today can way test life fellow
## 7 6 5 4 4 3 3 3 2 2
##
## $`1949-Truman`
## nations world can peace people freedom free united must security
## 22 22 16 14 12 12 11 10 9 9
##
## $`1953-Eisenhower`
## free world faith peace shall us people must upon freedom
## 21 14 13 12 11 11 10 10 10 10
##
## $`1957-Eisenhower`
## may nations world peace freedom people seek can must upon
## 15 14 14 11 11 10 10 9 9 6
##
## $`1961-Kennedy`
## let us can world sides new pledge citizens nations free
## 16 12 9 8 8 7 7 5 5 5
##
## $`1965-Johnson`
## us change nation must people union man world old every
## 12 12 11 10 9 9 9 7 7 6
##
## $`1969-Nixon`
## us can people world peace let know now make earth
## 20 17 14 13 12 11 10 9 9 9
##
## $`1973-Nixon`
## us let peace world new can america responsibility government great
## 26 22 19 16 15 14 13 11 10 9
##
## $`1977-Carter`
## can nation new must us people together strength spirit human
## 13 10 9 8 8 7 7 7 6 5
##
## $`1981-Reagan`
## us government must believe people americans one time world freedom
## 25 16 10 10 9 9 8 8 8 8
##
## $`1985-Reagan`
## us people world one government freedom must time now human
## 27 16 15 14 13 13 12 10 10 9
##
## $`1989-Bush`
## new us can great nation world free must hand good
## 14 13 11 10 10 10 9 9 8 8
##
## $`1993-Clinton`
## world must america us people today new let change americans
## 18 18 15 13 12 10 9 9 9 9
##
## $`1997-Clinton`
## new us century nation time every people america land one
## 29 27 20 13 12 11 11 11 11 10
##
## $`2001-Bush`
## us country citizens story nation america can every must never
## 11 9 9 9 8 8 6 6 6 5
##
## $`2005-Bush`
## freedom liberty america every one nation country world americans america's
## 25 15 12 10 9 9 8 8 8 8
##
## $`2009-Obama`
## us can nation new every must america people less let
## 23 13 12 11 8 8 8 7 7 7
##
## $`2013-Obama`
## us must people time can every together make one country
## 21 17 11 10 7 7 7 7 6 6
##
## $`2017-Trump`
## america american people country one every never great nation new
## 18 11 10 9 8 7 6 6 6 6
##
## $`2021-Biden`
## us america can one nation must democracy people another american
## 27 18 16 15 12 10 10 9 9 9
# we can also compute topfeatures by any docvar
docvars(dfx)topfeatures(dfx, n = 10, groups = Party)## $Democratic
## us people can government must nation world new shall every
## 222 199 173 143 138 126 118 113 111 109
##
## $`Democratic-Republican`
## government great states war may public every us union country
## 68 61 56 51 49 48 45 44 42 40
##
## $Federalist
## people government may nations country can states nation constitution foreign
## 20 16 13 11 9 9 9 9 8 8
##
## $none
## can every government may present country public shall citizens people
## 9 9 9 7 6 6 6 6 5 5
##
## $Republican
## people government can us must upon world great country peace
## 264 240 228 218 201 192 180 159 147 139
##
## $Whig
## government states people power constitution may upon union one country
## 88 61 57 57 55 51 50 47 45 42
Print topfeatures per President.
topfeatures(dfx, n = 10, groups = President)## $Adams
## government people union upon country nations nation may constitution public
## 33 27 22 21 18 18 17 16 16 15
##
## $Biden
## us america can one nation must democracy people another american
## 27 18 16 15 12 10 10 9 9 9
##
## $Buchanan
## states shall constitution may people government great question country public
## 22 18 17 15 13 13 11 11 9 9
##
## $Bush
## freedom nation us america can world must country new time
## 36 27 27 27 24 21 21 20 20 18
##
## $Carter
## can nation new must us people together strength spirit human
## 13 10 9 8 8 7 7 7 6 5
##
## $Cleveland
## people government public every shall us interests american can upon
## 35 29 19 14 14 14 12 12 11 11
##
## $Clinton
## us new world must america people century time nation let
## 40 38 28 28 26 23 21 19 18 18
##
## $Coolidge
## can country must great people government world peace much upon
## 26 17 17 16 15 14 13 13 12 12
##
## $Eisenhower
## world free peace nations freedom people may must upon can
## 28 26 23 21 21 20 19 19 16 15
##
## $Garfield
## people government states constitution can upon great union law nation
## 21 20 15 15 13 13 11 11 10 9
##
## $Grant
## country now best people office one without upon can every
## 16 11 11 9 9 8 8 8 7 7
##
## $Harding
## world must america war never civilization can new order may
## 23 23 15 13 12 12 11 11 10 10
##
## $Harrison
## people upon power constitution may government states great can executive
## 67 55 53 47 46 46 44 33 31 30
##
## $Hayes
## country government upon public states political people great party citizens
## 20 15 15 11 11 10 9 9 8 7
##
## $Hoover
## government can upon progress people world must peace justice nation
## 24 17 17 16 15 15 15 15 14 12
##
## $Jackson
## government people public states union shall can may power powers
## 19 13 11 10 10 9 8 8 7 6
##
## $Jefferson
## public may us citizens government fellow shall state can peace
## 18 18 17 15 15 13 13 10 9 9
##
## $Johnson
## us change nation must people union man world old every
## 12 12 11 10 9 9 9 7 7 6
##
## $Kennedy
## let us can world sides new pledge citizens nations free
## 16 12 9 8 8 7 7 5 5 5
##
## $Lincoln
## can union constitution shall people states government now one may
## 28 24 24 22 20 19 19 17 16 16
##
## $Madison
## war country public united states every nations can nation without
## 16 9 8 8 8 7 7 6 6 6
##
## $McKinley
## upon people government must congress united states great now public
## 42 37 36 27 27 21 21 21 20 19
##
## $Monroe
## great states government united every people war may made us
## 50 41 33 29 27 26 26 25 20 20
##
## $Nixon
## us let can peace world new people america make government
## 46 33 31 31 29 23 20 18 16 15
##
## $Obama
## us must can people nation new time every america now
## 44 25 20 18 18 17 16 15 14 11
##
## $Pierce
## upon can power government every may shall must us states
## 24 14 11 10 9 9 9 9 9 8
##
## $Polk
## government states union one people powers constitution country interests upon
## 45 36 32 19 16 16 15 14 14 14
##
## $Reagan
## us government people world one must freedom time americans now
## 52 29 25 23 22 22 21 18 16 15
##
## $Roosevelt
## people us nation can government must men shall life democracy
## 32 32 31 25 23 23 22 22 21 20
##
## $Taft
## government business must can may upon proper congress race law
## 26 22 19 18 18 16 15 14 13 13
##
## $Taylor
## shall government country duties may interests constitution us congress day
## 15 7 6 5 4 4 4 4 4 3
##
## $Truman
## nations world can peace people freedom free united must security
## 22 22 16 14 12 12 11 10 9 9
##
## $Trump
## america american people country one every never great nation new
## 18 11 10 9 8 7 6 6 6 6
##
## $`Van Buren`
## every people institutions government country upon us may can never
## 20 20 16 15 13 13 12 11 9 8
##
## $Washington
## can every government may present country public shall citizens people
## 9 9 9 7 6 6 6 6 5 5
##
## $Wilson
## upon great shall life us men nation things justice purpose
## 21 18 15 14 13 11 11 11 10 10
Sometimes we want to analyze certain indicators on sentence-level. To show how to go about doing so, we will compute the per-sentence sentiment in Biden’s 2021 speech.
## first step: extract Biden's speech from the corpus
biden <- corpus_subset(df, President == 'Biden')
## 2nd step: reshape corpus from full texts to sentences
sentences <- corpus_reshape(biden, to = 'sentences')
sentences## Corpus consisting of 216 documents and 4 docvars.
## 2021-Biden.1 :
## "Chief Justice Roberts, Vice President Harris, Speaker Pelosi..."
##
## 2021-Biden.2 :
## "This is America's day."
##
## 2021-Biden.3 :
## "This is democracy's day."
##
## 2021-Biden.4 :
## "A day of history and hope."
##
## 2021-Biden.5 :
## "Of renewal and resolve."
##
## 2021-Biden.6 :
## "Through a crucible for the ages America has been tested anew..."
##
## [ reached max_ndoc ... 210 more documents ]
## 3rd step: within-sentence word tokenization
# tokenize
sentence_toks <- tokens(sentences, what = 'word', remove_punct = T)
# make lower case
sentence_toks <- tokens_tolower(sentence_toks)
# remove stopwords
sentence_toks <- tokens_select(sentence_toks, pattern = stopwords("en"),
selection = "remove")## select a sentiment dictionary
## we use the Proksch et al. (2015 dictionary native to quanteda)
data_dictionary_LSD2015## Dictionary object with 4 key entries.
## - [negative]:
## - a lie, abandon*, abas*, abattoir*, abdicat*, aberra*, abhor*, abject*, abnormal*, abolish*, abominab*, abominat*, abrasiv*, absent*, abstrus*, absurd*, abus*, accident*, accost*, accursed* [ ... and 2,838 more ]
## - [positive]:
## - ability*, abound*, absolv*, absorbent*, absorption*, abundanc*, abundant*, acced*, accentuat*, accept*, accessib*, acclaim*, acclamation*, accolad*, accommodat*, accomplish*, accord, accordan*, accorded*, accords [ ... and 1,689 more ]
## - [neg_positive]:
## - best not, better not, no damag*, no no, not ability*, not able, not abound*, not absolv*, not absorbent*, not absorption*, not abundanc*, not abundant*, not acced*, not accentuat*, not accept*, not accessib*, not acclaim*, not acclamation*, not accolad*, not accommodat* [ ... and 1,701 more ]
## - [neg_negative]:
## - not a lie, not abandon*, not abas*, not abattoir*, not abdicat*, not aberra*, not abhor*, not abject*, not abnormal*, not abolish*, not abominab*, not abominat*, not abrasiv*, not absent*, not abstrus*, not absurd*, not abus*, not accident*, not accost*, not accursed* [ ... and 2,840 more ]
## apply dictionary to the Biden's speech
toks_lsd <- tokens_lookup(sentence_toks, dictionary = data_dictionary_LSD2015[1:2])
dfm_lsd <- dfm(toks_lsd)
## compute percentage of positive words per sentence
## over the course of the speech
# melt dfm to long table
df_lsd <- convert(dfm_lsd, to = "data.frame")
df_lsddf_lsd <- melt(df_lsd, id.vars = 'doc_id', variable.name = 'sentiment', value.name = 'n')
head(df_lsd)# group by sentence (doc_id) and compute percentages
df_lsd <- df_lsd %>%
group_by(doc_id) %>%
mutate(perc = n/sum(n))
head(df_lsd)# give every sentence a numeric value corresponding to doc ID
df_lsd <- df_lsd %>%
ungroup %>%
mutate(num_id = as.numeric(stri_extract(doc_id, regex = '(?<=\\.)[0-9]+')))## plot results
ggplot(data = df_lsd, aes(x = num_id, y = perc, colour = sentiment,
group = sentiment)) +
geom_smooth() +
#geom_point(alpha = 0.5) +
theme_classic() +
theme(plot.title = element_text(face = 'bold')) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0.01, 0.01), labels = scales::percent) +
labs(
title = "Biden's 2021 speech: sentiment per sentence, smoothed",
x = 'Sentence Number Within Speech',
y = 'Frequency'
)## inspect sentence-tokens vectors 55-65
sentence_toks[55:65]## Tokens consisting of 11 documents and 4 docvars.
## 2021-Biden.55 :
## [1] "can" "right" "wrongs"
##
## 2021-Biden.56 :
## [1] "can" "put" "people" "work" "good" "jobs"
##
## 2021-Biden.57 :
## [1] "can" "teach" "children" "safe" "schools"
##
## 2021-Biden.58 :
## [1] "can" "overcome" "deadly" "virus"
##
## 2021-Biden.59 :
## [1] "can" "reward" "work" "rebuild" "middle" "class" "make" "health" "care" "secure"
##
## 2021-Biden.60 :
## [1] "can" "deliver" "racial" "justice"
##
## [ reached max_ndoc ... 5 more documents ]
## uh.. this doesn't look like it's meant negatively.
## let's double check by reading the untokenized sentences
sentences[55:65]## Corpus consisting of 11 documents and 4 docvars.
## 2021-Biden.55 :
## "We can right wrongs."
##
## 2021-Biden.56 :
## "We can put people to work in good jobs."
##
## 2021-Biden.57 :
## "We can teach our children in safe schools."
##
## 2021-Biden.58 :
## "We can overcome this deadly virus."
##
## 2021-Biden.59 :
## "We can reward work, rebuild the middle class, and make healt..."
##
## 2021-Biden.60 :
## "We can deliver racial justice."
##
## [ reached max_ndoc ... 5 more documents ]
## as expected, Biden is mentioning ISSUES, but in a combative wayLet’s compare the word embeddings for ‘progress’, ‘spirit’, ‘world’, ‘nation’, ‘duty’, and ‘war’ between Democrats and Republicans. We define the embedding as a window of +/-10 words around these keywords.
## tokenization
toks <- tokens(df, remove_punct = T, remove_symbols = T, padding = F)
toks <- tokens_replace(toks,
pattern = '\\bUS\\b',
replacement = 'USA')
toks <- tokens_tolower(toks)
## lemmatizing
toks <- tokens_replace(toks,
pattern = lexicon::hash_lemmas$token,
replacement = lexicon::hash_lemmas$lemma)
## remove stopwords stopwords
# custom stopwords
cstmwrds <- c('upon', 'can', 'us', 'let', 'may', 'us', 'make',
'must', 'many', 'shall', 'without', 'among',
'much', 'every', 'ever', 'know', 'new', 'never',
'year', 'find', 'see', 'good')
# remove them all
toks <- tokens_select(toks, pattern = c(stopwords("en"), cstmwrds),
selection = "remove")
## define the keywords
query <- c('progress', 'spirit', 'world', 'nation', 'duty', 'war')## subset data by the party-affiliation
demo <- tokens_subset(toks, Party == 'Democratic')
repub <- tokens_subset(toks, Party == 'Republican')
## select tokens within +/- 10 words around the keyword 'duty'
toks_demo <- tokens_select(demo, pattern = 'duty', selection = "keep",
window = 10, padding = FALSE, verbose = TRUE)
toks_repub <- tokens_select(repub, pattern = 'duty', selection = "keep",
window = 10, padding = FALSE, verbose = TRUE)
## create feature co-occurence matrix (fcm) with weights within the window
## the more distance between the words, the less weight the co-occurence gets
dfcmat_demo <- fcm(toks_demo, context = 'window', window = 10,
count = 'weighted', tri = FALSE)
dfcmat_repub <- fcm(toks_repub, context = 'window', window = 10,
count = 'weighted', tri = FALSE)
dfcmat_demo # have a look at one of the fcms## Feature co-occurrence matrix of: 641 by 641 features.
## features
## features life event day month one hand country voice health time
## life 0 0 0 0 0 0 0 0 0 0
## event 0 0 0 0 0.1428571 0 0 0 0 0
## day 0 0 0 0 0 0 0 0 0 0
## month 0 0 0 0 0 0 0 0 0 0
## one 0 0.1428571 0 0 0 0.1666667 0 0 0 0
## hand 0 0 0 0 0.1666667 0 0 0 0 0
## country 0 0 0 0 0 0 0 0 0 0
## voice 0 0 0 0 0 0 0 0 0 0
## health 0 0 0 0 0 0 0 0 1 0
## time 0 0 0 0 0 0 0 0 0 0
## [ reached max_feat ... 631 more features, reached max_nfeat ... 631 more features ]
## subset the fcm, by selecting those top 50 terms
dfcmat_demo <- fcm_select(dfcmat_demo,
pattern = names(topfeatures(dfcmat_demo, 51)),
selection = "keep")
dfcmat_repub <- fcm_select(dfcmat_repub,
pattern = names(topfeatures(dfcmat_repub, 51)),
selection = "keep")## compute varying word-label sizes for each term based on its frequency
label_sizes_demo <- rowSums(dfcmat_demo)/min(rowSums(dfcmat_demo))*1.2
## override the size for the term in <i> (cannibalizes the whole space of the plot)
label_sizes_demo['duty'] <- 0.1
set.seed(123) # set seed for repoducibility
p_demo <- quanteda.textplots::textplot_network(dfcmat_demo,
min_freq = 0.5,
edge_alpha = 0.2,
vertex_size =
rowSums(dfcmat_demo)/min(rowSums(dfcmat_demo))/8,
vertex_labelsize = label_sizes_demo,
edge_color = 'dodgerblue') +
# additional theme tweaks
labs(title = 'Democratic: duty') +
theme(plot.title = element_text(face = 'bold'))
## same for republicans
label_sizes_repub <- rowSums(dfcmat_repub)/min(rowSums(dfcmat_repub))*1.2
label_sizes_repub['duty'] <- 0.1
p_repub <- quanteda.textplots::textplot_network(dfcmat_repub,
min_freq = 0.5,
edge_alpha = 0.2,
vertex_size =
rowSums(dfcmat_repub)/min(rowSums(dfcmat_repub))/8,
vertex_labelsize = label_sizes_repub,
edge_color = 'firebrick') +
# additional theme tweaks
labs(title = 'Republican: duty') +
theme(plot.title = element_text(face = 'bold'))
## plot
grid.arrange(p_demo, p_repub, ncol = 2)## feature co-occurence matrix
container <- list() # plot-container
for(m in c('Democratic', 'Republican')){ # loop over party
for(i in query){ # loop over keywords
## subset data by the party-affiliation in <m>
toks_sel <- tokens_subset(toks, Party == m)
## select tokens within +/- 10 words around the keyword in <i>
toks_sel <- tokens_select(toks_sel, pattern = i, selection = "keep", window = 10,
padding = FALSE, verbose = TRUE)
## create feature co-occurence matrix (fcm) with weights within the window
## the more distance between the words, the less weight the co-occurence gets
dfcmat <- fcm(toks_sel, context = 'window', window = 10,
count = 'weighted', tri = FALSE)
## extract the names of the 50 top co-occurences in the embedding for the term in <i>
feat <- names(topfeatures(dfcmat, 51))
## subset the fcm, by selecting those top 50 terms
dfcmat_sel <- fcm_select(dfcmat, pattern = feat, selection = "keep")
## create plot
# compute varying word-label sizes for each term based on its frequency
label_sizes <- rowSums(dfcmat_sel)/min(rowSums(dfcmat_sel))*0.8
# override the size for the term in <i> (cannibalizes the whole space of the plot)
label_sizes[i] <- 0.1
set.seed(123) # set seed for repoducibility
p <- quanteda.textplots::textplot_network(dfcmat_sel,
min_freq = 0.5,
edge_alpha = 0.2,
vertex_size =
rowSums(dfcmat_sel)/min(rowSums(dfcmat_sel))/8,
vertex_labelsize = label_sizes,
edge_color =
ifelse(m=='Republican',
'firebrick',
'dodgerblue')) +
# additional theme tweaks
labs(title = paste0(m, ': ', i)) +
theme(plot.title = element_text(face = 'bold'))
## populate the container
container[[paste0(m, ': ', i)]] <- p
}
}
## plot panel
names(container)## [1] "Democratic: progress" "Democratic: spirit" "Democratic: world" "Democratic: nation" "Democratic: duty" "Democratic: war" "Republican: progress" "Republican: spirit" "Republican: world" "Republican: nation" "Republican: duty" "Republican: war"
grid.arrange(container[[1]], container[[7]],
container[[2]], container[[8]],
container[[3]], container[[9]],
container[[4]], container[[10]],
container[[5]], container[[11]],
container[[6]], container[[12]],
ncol = 2)A work by Lucien Baumgartner
lucien.baumgartner@philos.uzh.ch
https://lucienbaumgartner.github.io/" class="fa fa-home">